#!/usr/bin/env Rscript

suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(optparse))

expected <- read_csv("expected.csv", col_types=cols(
  category=col_integer(),
  linter=col_character(),
  expected=col_logical(),
))

actual <- read_csv("actual.csv", col_types=cols(
  issue = col_character(),
  category = col_integer(),
  linter = col_character(),
  actual = col_logical()
))

data <- left_join(
    actual,
    expected, 
    by=c("category", "linter")
  )

left_join(
    data |> filter(expected) |> count(category, .drop=FALSE),
    data |> filter(actual) |> count(category, .drop=FALSE),
    by = c("category"),
    suffix = c(".expected", ".actual")
  ) |>
  mutate(
    `n.actual` = replace_na(`n.actual`, 0),
    `pct` = `n.actual` / `n.expected`,
    # `pct.actual` = `n.actual` / 40,
  ) |>
  print(n=Inf)

q()

#### PERFORMANCE
#
# left_join(
#     data |> filter(expected) |> count(linter, .drop=FALSE), # expected
#     data |> filter(actual) |> count(linter, .drop=FALSE), # actual
#     by = c("linter"),
#     suffix = c(".expected", ".actual")
#   ) |>
#   mutate(
#     `pct.expected` = `n.actual` / `n.expected`,
#     `pct.actual` = `n.actual` / 40,
#   ) |>
#   print(n=Inf)

#### Inconsistent across categories
# df <- left_join(
#     data |> filter(expected) |> group_by(category, linter) |> summarize(n = n_distinct(issue)),
#     data |> filter(actual) |> group_by(category, linter) |> summarize(n = n_distinct(issue)),
#     by = c("category", "linter"),
#     suffix = c(".expected", ".actual")
#   ) |>
#   mutate(
#     `n.actual` = replace_na(`n.actual`, 0),
#     `pct` = `n.actual` / `n.expected`,
#   ) |>
#   filter(linter != "(any)")
#   arrange(linter, desc(pct)) |>
#   print(n=Inf)


d1 <- data |>
  filter(expected) |>
  group_by(issue) |>
  count() |>
  filter(n > 1)

d2 <- data |>
  filter(expected) |>
  group_by(issue)

distinct <- inner_join(d1, d2, by=c("issue")) |>
  group_by(issue) |>
  summarize(distinct = n_distinct(actual) == 1) |>
  filter(distinct)

inner_join(distinct, data, by=c("issue")) |>
  filter(expected) |>
  print(n=Inf)


### PLOT

summary <- data |>
  group_by(issue, category) |> 
  summarize_at(c("expected", "actual"), any) |>
  mutate(linter="(any)")

data <- data |>
  bind_rows(summary) %>%
  mutate(
    linter = fct_relevel(linter, "(any)", after=Inf)
  )

data <- data |>
  unite(result, expected, actual, sep="/", remove=FALSE) |>
  mutate(
    result = factor(
      result, 
      levels=c("FALSE/TRUE", "TRUE/TRUE", "FALSE/FALSE", "TRUE/FALSE")
    ),
  )

plot <- ggplot(data, aes(x=issue, y=fct_rev(linter), fill=result)) +
  geom_tile(
    show.legend = TRUE,
  ) +  
  scale_fill_brewer(
    name="Expected/Actual", 
    type="qual", 
    palette="Spectral", 
    direction=-1,
    drop=FALSE,
  ) +
  scale_x_discrete(name="Issue") +
  scale_y_discrete(name="Linter") +
  facet_grid(
    cols=vars(category),
    scales="free_x",
    space="free",
  ) +
  theme_bw(12) +
  theme(
    axis.text.x = element_blank(),
    legend.position="bottom",
  )

ggsave("plot2.pdf", plot, width=8, height=2.5)
